#Notes ## Notes about phenotyping csv file generation Uninfected phenotyping data was generated via flow cytometry gating to cell subset from alive cells. Infected phenotyping data was generated by gating to alive, then to only IAV+ cells (HA or NP positive), then to cell subset.
A “round” indicates an independent differentiation of NHBE cells.
Channel value csv files for each manually gated population were exported from FlowJo. A column was manually added that indicates what gated population the events belong to. Uninfected cells were gated from the alive population, infected cells were gated from the alive, then HA and/or NP positive populations. All csv files corresponding to the same well (N=3 wells each for infected and uninfected wells) were concatenated and saved as a single csv file.
library(ggplot2)
library(Spectre)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
library(forcats)
library(vegan)
## Loading required package: permute
## This is vegan 2.6-4
library(agricolae)
## Registered S3 methods overwritten by 'klaR':
## method from
## predict.rda vegan
## print.rda vegan
## plot.rda vegan
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
##
## Attaching package: 'factoextra'
## The following object is masked from 'package:agricolae':
##
## hcut
library(readxl)
library(readr)
library(ggalluvial)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(broom)
colors <- c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080")
setwd("/Users/sheph085/Documents/postdoc_projects/FN_nHTBE/Manuscripts/Code/scripts")
Shows bar plot comparing uninfected cell type frequencies between bronchial (NHBE) and nasal (NHNE) primary cells. Preinfection phenotyping data from round 7 was used to generate the bar plots comparing NHBE and NHNE in Figure 1A.
#CSV file contains all the infected and uninfected phenotyping data included in the paper:
#Uninfected phenotyping data includes 7 independent differentiations (rounds 3-9)
#Cal/07/09 infection (MOI=0.5) and phenotyping data was generated for rounds 3,4,7, and 9
#Round 3 includes an additional Cal/07/09 infection done at MOI=1.
#Read in data and calculate average and standard deviation of cell subset frequencies for each round/week/moi/condition combination.
phenotyping_dat <- read.csv("../data/pre_and_post_infection_phenotyping_data.csv", stringsAsFactors = TRUE, header = TRUE) %>%
group_by(round, week, moi, infection, cell_origin, condition) %>%
mutate(round = factor(round),
week = factor(week, levels = c("0", "1", "2", "3")),
cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
sample_id = cur_group_id()) %>%
group_by(sample_id, round, week, moi, infection, cell_origin, condition, cell_type) %>%
dplyr::summarise(avg_freq_of_alive = mean(frequency_of_alive),
sd = sd(frequency_of_alive)) %>%
mutate(SDpos = rev(cumsum(rev(avg_freq_of_alive))))
## `summarise()` has grouped output by 'sample_id', 'round', 'week', 'moi',
## 'infection', 'cell_origin', 'condition'. You can override using the `.groups`
## argument.
#Each combination of a round, week, "moi" and treatment consists of 2-4 culture wells (technical replicates).
#To get error bars to be placed correctly on bar graph, some additional data wrangling is required as well.
uninfected_dat <- phenotyping_dat %>%
filter(infection == "uninfected" & moi == 0.5)
#Just NHBE and NHNE
uninfected_dat %>%
mutate(week = factor(week),
condition_origin = paste0(condition, "-", cell_origin),
condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
filter(round %in% c("7"),
week %in% c("3"),
condition == "pneumacult") %>%
droplevels() %>%
ggplot(aes(x = condition_origin, y = avg_freq_of_alive, fill = cell_type)) +
geom_bar(stat = "identity", color = "black") +
geom_errorbar(aes(ymin = SDpos-sd, ymax = SDpos+sd), width = 0.2, position = "identity") +
scale_fill_manual(values = colors) +
scale_y_continuous(limits = c(0,100), expand = c(0,0)) +
ggtitle("NHBE vs NHNE pre-infection treatment comparisons\nRound 7") +
labs(y = "Average frequency of alive",
x = NULL,
fill = "Cell type",
caption = "Error bars show standard deviation of 2-4 wells per treatment/round") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank())
Shows UMAP of flow cytometry data. Flow cytometry channel value data from round 7 Pneumacult treated cells was used to generate UMAPs displaying the manually gated populations and marker staining. The UMAP is a combination of uninfected cells (n=3 wells) and infected cells (n=3 wells). Figure 1B in the manuscript only shows the uninfected cells, but both are used to generate the UMAP in order to integrate the cluster positions for comparison with figure 2A.
#Read flowjo metadata
meta <- read.csv("../data/figure_1_metadata.csv", header = TRUE)
#Import csv files
data.list <- Spectre::read.files(file.loc = "../data/round_7_channel_value_files/", file.type = ".csv")
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
#Merge data
cell.dat <- Spectre::do.merge.files(dat = data.list)
cell.dat
## FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
## 1: 72 65 384 78 65 353 184 153 205 243 151
## 2: 92 82 394 64 56 324 174 147 213 198 164
## 3: 100 82 416 64 53 343 195 154 199 166 145
## 4: 72 45 470 112 80 409 239 140 235 275 204
## 5: 70 52 420 93 76 367 194 150 236 263 172
## ---
## 185490: 74 51 438 64 42 407 218 143 220 187 147
## 185491: 176 143 446 40 34 314 138 150 192 223 148
## 185492: 68 54 422 103 77 400 216 145 228 233 189
## 185493: 159 133 433 48 36 368 152 146 209 262 156
## 185494: 72 50 431 65 51 349 145 146 205 256 155
## CD271 Comp-UV Blue L_D-A Time cell_type FileName
## 1: 549 226 0 Basal Round_7_Pneumacult_1
## 2: 549 214 0 Basal Round_7_Pneumacult_1
## 3: 579 182 0 Basal Round_7_Pneumacult_1
## 4: 543 257 0 Basal Round_7_Pneumacult_1
## 5: 542 209 0 Basal Round_7_Pneumacult_1
## ---
## 185490: 161 306 995 Triple negative Round_7_Pneumacult_Cal_6
## 185491: 147 249 995 Triple negative Round_7_Pneumacult_Cal_6
## 185492: 158 358 996 Triple negative Round_7_Pneumacult_Cal_6
## 185493: 138 226 996 Triple negative Round_7_Pneumacult_Cal_6
## 185494: 138 238 996 Triple negative Round_7_Pneumacult_Cal_6
## FileNo
## 1: 1
## 2: 1
## 3: 1
## 4: 1
## 5: 1
## ---
## 185490: 6
## 185491: 6
## 185492: 6
## 185493: 6
## 185494: 6
#Add metadata
sample.info <- meta[,c(1:4, 6)]
cell.dat <- do.add.cols(cell.dat, "FileName", sample.info, "FileName", rmv.ext = TRUE)
## Removing '.csv' or '.fcs' extension
## Step 1/3. Mapping data
## Step 2/3. Merging data
## Step 3/3. Returning data
#Define what markers to use to cluster cells (all markers except for HA and NA: SiR-Tub, CD66c, TSPAN8, CD271)
cluster.cols <- names(cell.dat)[c(7,10:12)]
#Define variables that contain metadata
exp.name <- "Round 7"
sample.col <- "sample"
treatment.col <- "treatment"
condition.col <- "condition"
data.frame(table(cell.dat[[condition.col]])) # Check number of cells per infection condition.
## Var1 Freq
## 1 infected 87512
## 2 uninfected 97982
#Define target for subsampling. We have 87,000 cells in all uninfected and ~98,000 cells in all infected conditions. Here we will use 20,000 cells per target for graphing (but all cells are used to generate the clusters).
sub.targets <- c(20000, 20000)
#Run flowSOM clustering algorithm on full dataset, generate 9 clusters (same number that we have in our dataset by manual gating)
clustered.dat <- run.flowsom(cell.dat, cluster.cols, meta.k = 9)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: FlowSOM
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:agricolae':
##
## similarity
## The following object is masked from 'package:vegan':
##
## diversity
## The following object is masked from 'package:permute':
##
## permute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
## Thanks for using FlowSOM. From version 2.1.4 on, the scale
## parameter in the FlowSOM function defaults to FALSE
## Preparing data
## Starting FlowSOM
## Building SOM
## Mapping data to SOM
## Building MST
## Binding metacluster labels to starting dataset
## Binding cluster labels to starting dataset
clustered.dat
## FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
## 1: 72 65 384 78 65 353 184 153 205 243 151
## 2: 92 82 394 64 56 324 174 147 213 198 164
## 3: 100 82 416 64 53 343 195 154 199 166 145
## 4: 72 45 470 112 80 409 239 140 235 275 204
## 5: 70 52 420 93 76 367 194 150 236 263 172
## ---
## 185490: 74 51 438 64 42 407 218 143 220 187 147
## 185491: 176 143 446 40 34 314 138 150 192 223 148
## 185492: 68 54 422 103 77 400 216 145 228 233 189
## 185493: 159 133 433 48 36 368 152 146 209 262 156
## 185494: 72 50 431 65 51 349 145 146 205 256 155
## CD271 Comp-UV Blue L_D-A Time cell_type FileName
## 1: 549 226 0 Basal Round_7_Pneumacult_1
## 2: 549 214 0 Basal Round_7_Pneumacult_1
## 3: 579 182 0 Basal Round_7_Pneumacult_1
## 4: 543 257 0 Basal Round_7_Pneumacult_1
## 5: 542 209 0 Basal Round_7_Pneumacult_1
## ---
## 185490: 161 306 995 Triple negative Round_7_Pneumacult_Cal_6
## 185491: 147 249 995 Triple negative Round_7_Pneumacult_Cal_6
## 185492: 158 358 996 Triple negative Round_7_Pneumacult_Cal_6
## 185493: 138 226 996 Triple negative Round_7_Pneumacult_Cal_6
## 185494: 138 238 996 Triple negative Round_7_Pneumacult_Cal_6
## FileNo sample round treatment condition FlowSOM_cluster
## 1: 1 1_PC 7 PC uninfected 177
## 2: 1 1_PC 7 PC uninfected 177
## 3: 1 1_PC 7 PC uninfected 192
## 4: 1 1_PC 7 PC uninfected 149
## 5: 1 1_PC 7 PC uninfected 164
## ---
## 185490: 6 6_PC 7 PC infected 155
## 185491: 6 6_PC 7 PC infected 141
## 185492: 6 6_PC 7 PC infected 155
## 185493: 6 6_PC 7 PC infected 141
## 185494: 6 6_PC 7 PC infected 141
## FlowSOM_metacluster
## 1: 8
## 2: 8
## 3: 8
## 4: 8
## 5: 8
## ---
## 185490: 5
## 185491: 5
## 185492: 5
## 185493: 5
## 185494: 5
#Create subsampled dataset for UMAP visualization
cell.sub <- do.subsample(clustered.dat, sub.targets, treatment.col)
cell.sub
## FSC-A FSC-H FSC-W SSC-A SSC-H SSC-W SiR-Tub Cal NP Cal HA CD66c TSPAN8
## 1: 103 57 477 158 114 447 384 164 279 492 201
## 2: 575 306 710 718 394 693 784 134 347 625 219
## 3: 137 105 437 88 74 364 197 157 459 262 140
## 4: 70 63 383 77 67 345 184 149 197 203 167
## 5: 77 58 411 59 52 323 181 143 197 135 151
## ---
## 19996: 170 106 496 143 98 434 278 158 268 289 162
## 19997: 125 100 422 80 62 444 190 153 196 203 158
## 19998: 68 54 450 200 135 477 225 419 605 311 164
## 19999: 109 85 417 90 72 379 240 163 391 275 152
## 20000: 587 279 782 421 170 724 210 144 335 402 223
## CD271 Comp-UV Blue L_D-A Time cell_type FileName
## 1: 168 252 683 Secretory Round_7_Pneumacult_2
## 2: 287 350 9 Ciliated Round_7_Pneumacult_2
## 3: 648 211 160 Basal Round_7_Pneumacult_Cal_4
## 4: 340 200 83 CD66c- other Round_7_Pneumacult_3
## 5: 481 199 898 CD66c- other Round_7_Pneumacult_2
## ---
## 19996: 149 213 430 Secretory Round_7_Pneumacult_2
## 19997: 662 213 646 Basal Round_7_Pneumacult_Cal_5
## 19998: 209 197 712 Secretory Round_7_Pneumacult_Cal_6
## 19999: 654 236 703 Basal Round_7_Pneumacult_Cal_4
## 20000: 146 341 949 Secretory Round_7_Pneumacult_1
## FileNo sample round treatment condition FlowSOM_cluster
## 1: 2 2_PC 7 PC uninfected 47
## 2: 2 2_PC 7 PC uninfected 17
## 3: 4 4_PC 7 PC infected 152
## 4: 3 3_PC 7 PC uninfected 172
## 5: 2 2_PC 7 PC uninfected 189
## ---
## 19996: 2 2_PC 7 PC uninfected 101
## 19997: 5 5_PC 7 PC infected 195
## 19998: 6 6_PC 7 PC infected 115
## 19999: 4 4_PC 7 PC infected 153
## 20000: 1 1_PC 7 PC uninfected 62
## FlowSOM_metacluster
## 1: 5
## 2: 2
## 3: 8
## 4: 5
## 5: 5
## ---
## 19996: 5
## 19997: 8
## 19998: 5
## 19999: 8
## 20000: 5
#Create UMAP
cell.sub <- run.umap(cell.sub, cluster.cols, neighbours = 50)
## Loading required package: umap
## [2023-08-21 15:55:13] starting umap
## [2023-08-21 15:55:13] creating graph of nearest neighbors
## [2023-08-21 15:57:54] creating initial embedding
## [2023-08-21 15:58:01] optimizing embedding
## [2023-08-21 15:59:23] done
#A little data wrangling for graphing
cell.sub$cell_type <- factor(cell.sub$cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple negative", "CD66c+ other", "CD66c- other", "Goblet", "CD271+ ciliated", "CD66c+ basal"))
cell.sub$orderrank_np <- rank(cell.sub$`Cal NP`,ties.method="first")
cell.sub$orderrank_ha <- rank(cell.sub$`Cal HA`,ties.method="first")
cell.sub$condition <- factor(cell.sub$condition, levels = c("uninfected", "infected"))
cell.sub$FlowSOM_metacluster <- factor(cell.sub$FlowSOM_metacluster)
#Make graph of UMAP colored by populations assigned via manual gating in FlowJo
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = cell_type)) +
geom_point(size = 0.5) +
facet_grid(. ~ condition) +
scale_color_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "black", "#108080")) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by cell type") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `SiR-Tub`)) +
geom_point(alpha = 0.4) +
scale_color_distiller(palette = "Spectral")+
facet_grid(. ~ condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by SiR-Tub staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `CD66c`)) +
geom_point(alpha = 0.4) +
scale_color_distiller(palette = "Spectral") +
facet_grid(. ~ condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by CD66c staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `TSPAN8`)) +
geom_point(alpha = 0.4) +
scale_color_distiller(palette = "Spectral") +
facet_grid(.~condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by TSPAN8 staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `CD271`)) +
geom_point(alpha = 0.4) +
scale_color_distiller(palette = "Spectral") +
facet_grid(.~condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by CD271 staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
Shows heatmap comparing changes in cell type frequencies each week during differentiation. The timecourse experiments were performed from two independent differentiations (round 6 and 7) of NHBE cells, and one independent differentiation of NHNE cells. The exact timepoints/cells included were: * NHBE differentiation, phenotyped at weeks 0, 1, 2, and 3 * NHBE differentiation, phenotyped at weeks 0, 1, and 3 * NHNE differentiation, phenotyped at weeks 0, 1, 2, and 3
Data in paper is from the “round 6” NHBE differentiation, which is one of two experiments where a timecourse was performed. But all sets of timecourse data are included/graphed below.
uninfected_dat %>%
filter(round %in% c("6", "7"), #Filter down to just the timecourse data from rounds 6 and 7
condition == "pneumacult",
!(round == "7" & cell_origin == "nhne")) %>%
droplevels() %>%
group_by(round, cell_origin, cell_type) %>%
mutate(fold_change_vs_wk0 = avg_freq_of_alive/avg_freq_of_alive[match('0', week)], #Calculate change in frequency compared to week 0 for each cell type
log_fold_change_vs_wk0 = log10(fold_change_vs_wk0)) %>% #Log transform
filter(week != 0) %>% #Filter week 0 since it is zero log fold change
ggplot(aes(x = week,
y = fct_rev(cell_type),
fill = log_fold_change_vs_wk0)) +
geom_tile(color = "black") +
scale_fill_gradient2(name = "Log10 fold change in frequency\nvs Week 0",
high="red", mid = "white", low = "#333333") +
facet_wrap(round~cell_origin, ncol = 2) +
theme_bw() +
ggtitle("Cell type frequency changes over time\n(log10 fold change vs Week 0)")+
labs(x = "Week",
y = "Cell type") +
theme(plot.title = element_text(size = 12),
panel.grid.major = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 10))
Shows a box and whiskers plot of Shannon’s alpha diversity indices calculated over time in the dataset.
#Alpha values are calculated here for infected AND uninfected data. Figure 1 only includes alpha data from uninfected replicates. Infected alpha values are depicted in Figure 2.
#Manipulate uninfected phenotyping df so that sample IDs are column names and row names are the cell type
count_dat <- phenotyping_dat %>%
ungroup() %>%
select(sample_id,cell_type,avg_freq_of_alive) %>%
tidyr::spread(sample_id, avg_freq_of_alive)
#Calculate simpson alpha index for each sample from the above manipulated dataframe
alpha <- vegan::diversity(count_dat[,-1], MARGIN = 2, index = "simpson")
alpha <- as.data.frame(alpha)
#Create new df that contains alpha information for each replicate
colnames(alpha) <- "alpha"
alpha <- merge(phenotyping_dat %>%
select(sample_id, round, week, moi, infection, cell_origin, condition) %>%
distinct(., sample_id, round, week, moi, infection, cell_origin, condition) %>%
tibble::column_to_rownames('sample_id'),
alpha,
by = "row.names")
#Function to position annotations above bar plots
give.n <- function(x){
return(c(y = median(x)*1.2, label = length(x)))
# experiment with the multiplier to find the perfect position
}
#Create plot of just the uninfected data
alpha %>%
filter(infection == "uninfected" & moi == 0.5) %>% #Filter infected data from df
ggplot(aes(x = week, y = alpha)) +
geom_boxplot()+
ggtitle("Simpson's alpha diversity index by week\nRounds 3-9, Week 3") +
stat_summary(fun.data = give.n, geom = "text", position = position_dodge(width = 0.75), fun.y = median) +
scale_y_continuous(limits = c(0.3,1), expand = c(0,0)) +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black")) +
labs(x = "Weeks post-ALI",
y = "Simpson's alpha diversity index",
caption = "Numbers above boxes = # replicates")
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Shows heatmap comparing changes in cell type frequencies among the different treatment conditions within an independent differentiation (“round”). The heatmaps show relative changes in cell type frequencies versus a baseline of the Pneumacult treatment group. All rounds are graphed here, but only round 7 data is shown in Figure 1E.
First a comparison of all rounds. The NHBE and NHNE cells are graphed separately.
#All rounds
uninfected_dat %>%
filter(week == "3") %>%
group_by(round, cell_origin, cell_type) %>%
mutate(fold_change_vs_pc = avg_freq_of_alive/avg_freq_of_alive[match('pneumacult', condition)]) %>%
mutate(condition = factor(condition, levels = c("pneumacult", "il13_low", "il13_high", "dapt", "lonza", "ifn")),
round_cell_origin = paste(round, cell_origin, sep = "_"),
log_fold_change_vs_pc = log10(fold_change_vs_pc)) %>%
ggplot(aes(x = condition,
y = fct_rev(cell_type),
fill = log_fold_change_vs_pc)) +
geom_tile(color = "black") +
scale_fill_gradient2(name = "Log10 fold change in frequency\nvs Pneumacult",
high="red", mid = "white", low = "#333333") +
facet_grid(round~cell_origin) +
theme_bw() +
ggtitle("Cell type frequency changes after treatments\nWeek 3 (log10 fold change vs Pneumacult)")+
labs(y = "Cell Type",
x = "Condition") +
theme(plot.title = element_text(size = 12),
panel.grid.major = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 10))
#Round 7 only, remove the NHBE Pneumacult column because it is all zero (it is log10 fold change vs itself)
#Also include the Pneumacult NHNE group, instead of graphing NHBE and NHNE separately
uninfected_dat %>%
filter(week == "3" & round == "7") %>%
mutate(condition_origin = paste0(condition, "-", cell_origin),
condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
group_by(cell_type) %>%
mutate(fold_change_vs_pc = avg_freq_of_alive/avg_freq_of_alive[match('pneumacult-nhbe', condition_origin)]) %>%
mutate(log_fold_change_vs_pc = log10(fold_change_vs_pc)) %>%
filter(condition_origin != "pneumacult-nhbe") %>%
ggplot(aes(x = condition_origin,
y = fct_rev(cell_type),
fill = log_fold_change_vs_pc)) +
geom_tile(color = "black") +
scale_fill_gradient2(limits = c(-3,2), name = "Log10 fold change in frequency\nvs Pneumacult",
high="red", mid = "white", low = "#333333") +
theme_bw() +
ggtitle("Cell type frequency changes after treatments\nWeek 3 (log10 fold change vs Pneumacult)")+
labs(y = "Cell Type",
x = "Condition") +
theme(plot.title = element_text(size = 12),
panel.grid.major = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 10))
Shows a box and whiskers plot of Shannon’s alpha diversity indices calculated for all the week 3 data, combining rounds and conditions.
Since only a single replicate of NHNE differentiations at conditions other than Pneumacult was performed, only NHBE Pneumacult treated conditions are included.
#first, do ANOVA to compare treatment group alpha diversities preinfection
#Do ANOVA to look for differences
anova_result <- aov(alpha ~ condition_origin,
alpha %>%
filter(infection == "uninfected" & week == "3") %>%
mutate(condition_origin = paste0(condition, "-", cell_origin),
condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
filter(condition_origin %in% c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne")))
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## condition_origin 5 0.245 0.04900 7.681 0.000306 ***
## Residuals 21 0.134 0.00638
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
tukey_result <- HSD.test(anova_result, "condition_origin", group = TRUE)
print(tukey_result)
## $statistics
## MSerror Df Mean CV
## 0.006378581 21 0.7055277 11.32004
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey condition_origin 6 4.424353 0.05
##
## $means
## alpha std r Min Max Q25 Q50
## dapt-nhbe 0.4401954 0.08435288 3 0.3810741 0.5367927 0.3918968 0.4027194
## il13_high-nhbe 0.7456857 0.10254686 6 0.5503647 0.8478375 0.7451601 0.7692444
## il13_low-nhbe 0.7540470 0.06342009 6 0.6461721 0.8017064 0.7256237 0.7857632
## lonza-nhbe 0.7004960 0.02830200 3 0.6683451 0.7216454 0.6899212 0.7114973
## pneumacult-nhbe 0.7428324 0.08633512 7 0.5578255 0.8189732 0.7457429 0.7668465
## pneumacult-nhne 0.7144745 0.02655215 2 0.6956993 0.7332497 0.7050869 0.7144745
## Q75
## dapt-nhbe 0.4697561
## il13_high-nhbe 0.7925297
## il13_low-nhbe 0.7959157
## lonza-nhbe 0.7165714
## pneumacult-nhbe 0.7823480
## pneumacult-nhne 0.7238621
##
## $comparison
## NULL
##
## $groups
## alpha groups
## il13_low-nhbe 0.7540470 a
## il13_high-nhbe 0.7456857 a
## pneumacult-nhbe 0.7428324 a
## pneumacult-nhne 0.7144745 a
## lonza-nhbe 0.7004960 a
## dapt-nhbe 0.4401954 b
##
## attr(,"class")
## [1] "group"
tukey_groups <- tukey_result$groups[order(rownames(tukey_result$groups)),]
#Great graph with the tukey groups annotated
alpha %>%
filter(infection == "uninfected" & week == "3") %>%
mutate(condition_origin = paste0(condition, "-", cell_origin),
condition_origin = factor(condition_origin, levels = c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe", "lonza-nhbe", "pneumacult-nhne", "il13_low-nhne", "il13_high-nhne", "dapt-nhne"))) %>%
filter(condition_origin %in% c("pneumacult-nhbe", "il13_low-nhbe", "il13_high-nhbe", "dapt-nhbe","ifn-nhbe", "lonza-nhbe", "pneumacult-nhne")) %>%
ggplot(aes(x=condition_origin,
y = alpha,
color = condition_origin)) +
geom_boxplot() +
geom_text(data = data.frame(),
aes(x = rownames(tukey_groups), y = max(alpha$alpha) + .1, label = tukey_groups$groups),
col = 'black',
size = 10) +
scale_color_manual(values = c("#70C1B3", "#004F2D", "#FFE066", "#247BA0", "#999999", "#70C1B3")) +
stat_summary(fun.data = give.n, geom = "text", position = position_dodge(width = 0.75), fun.y = median) +
ggtitle("Simpson's alpha diversity index-by condition\nRounds 3-9, Week 3") +
labs(x = "Condition",
y = "Simpson's alpha diversity index",
caption = "Letters indicate groups determined by Tukey posthoc test\nNumbers indicate # of replicates") +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
Did an ANOVA to look for differences in alpha diversities among the treatment groups. Found that none of the alpha diversities across treatment groups were statistically significant from each other, except for NHBE cells treated with Pneumacult + DAPT.
Shows PCA of uninfected phenotyping data. PCA was performed on the uninfected cell type frequencies and colored by treatment and cell origin (NHBE or NHNE).
#Get the uninfected week 3 data in own dataframe
uninfected_pca_wk3_dat <- uninfected_dat %>%
filter(week == 3) %>%
ungroup() %>%
select(sample_id, condition, cell_type, cell_origin, avg_freq_of_alive) %>%
tidyr::spread(cell_type, avg_freq_of_alive) %>%
droplevels() %>%
mutate(condition = factor(condition, levels = c("pneumacult", "il13_low", "il13_high", "dapt", "lonza"))) %>%
tibble::column_to_rownames('sample_id')
#Run principle components analysis on a matrix of the above data
preinfection_pca_wk3 <- prcomp(as.matrix(uninfected_pca_wk3_dat[3:11]), scale = TRUE)
#Plot the PCA biplot
preinfection_pca_plot_wk3 <- fviz_pca_biplot(preinfection_pca_wk3,
mean.point = FALSE,
label = "var",
title = "NH(B/N)E Preinfection phenotyping PCA-\nWeek 3 only")+
geom_point(size = 3,aes(shape = uninfected_pca_wk3_dat$cell_origin, color = uninfected_pca_wk3_dat$condition)) +
scale_color_manual(values = c("#70C1B3", "#004F2D", "#FFE066", "#247BA0", "#999999"))+
guides(shape = guide_legend(title = "Cell origin"),
colour = guide_legend(title = "Condition"))
preinfection_pca_plot_wk3
#Create plots of active variable contribution to the first two principal components
fviz_contrib(preinfection_pca_wk3, choice="var", axes = 1)
fviz_contrib(preinfection_pca_wk3, choice="var", axes = 2)
MDS plot showing similarity in uninfected cell type frequencies between donors. 5 different donors were used. This MDS plot compares NHBE Pneumacult treatments only, since this treatment was performed for each round.
Donor information: * Rounds 3 and 4: Donor A * Round 5: Donor B * Rounds 6 and 7: Donor C * Round 8: Donor D * Round 9: Donor E
#Dataframe of donor info
donor_info <- data.frame(round = c("3", "4", "5", "6", "7", "8", "9"),
donor = c("A", "A", "B", "C", "C", "D", "E"))
#Calculate Bray-Curtis index for dissimilarity
nhbe_pneu_beta_dist <- vegdist(t(uninfected_dat %>%
filter(cell_origin == "nhbe",
moi == 0.5,
condition == "pneumacult",
week == "3") %>%
ungroup() %>%
select(sample_id, cell_type, avg_freq_of_alive) %>%
tidyr::spread(sample_id, avg_freq_of_alive) %>%
.[,-1]), index = "bray")
#Feed into an MDS function:
nhbe_pneu_mds <- metaMDS(nhbe_pneu_beta_dist)
## Run 0 stress 0.003160624
## Run 1 stress 0.003210843
## ... Procrustes: rmse 0.003670819 max resid 0.004473532
## ... Similar to previous best
## Run 2 stress 0.003243644
## ... Procrustes: rmse 0.006145051 max resid 0.007536311
## Run 3 stress 0.003250694
## ... Procrustes: rmse 0.006688302 max resid 0.008215421
## Run 4 stress 0.08722371
## Run 5 stress 0.1406759
## Run 6 stress 0.05931879
## Run 7 stress 0.0593188
## Run 8 stress 0.08722371
## Run 9 stress 0.003235935
## ... Procrustes: rmse 0.0055554 max resid 0.00680137
## Run 10 stress 0.1406759
## Run 11 stress 0.003160697
## ... Procrustes: rmse 3.872058e-05 max resid 7.12503e-05
## ... Similar to previous best
## Run 12 stress 0.05931883
## Run 13 stress 0.003160621
## ... New best solution
## ... Procrustes: rmse 4.306163e-06 max resid 8.108033e-06
## ... Similar to previous best
## Run 14 stress 0.003256923
## ... Procrustes: rmse 0.007055628 max resid 0.008668112
## Run 15 stress 0.1406759
## Run 16 stress 0.003244011
## ... Procrustes: rmse 0.006152001 max resid 0.007541593
## Run 17 stress 0.003247572
## ... Procrustes: rmse 0.006408561 max resid 0.007867237
## Run 18 stress 0.003160689
## ... Procrustes: rmse 1.272649e-05 max resid 1.76575e-05
## ... Similar to previous best
## Run 19 stress 0.1406759
## Run 20 stress 0.003160659
## ... Procrustes: rmse 2.698592e-05 max resid 5.087215e-05
## ... Similar to previous best
## *** Best solution repeated 3 times
nhbe_pneu_mds_data <- as.data.frame(nhbe_pneu_mds$points)
nhbe_pneu_graph_data <- merge(nhbe_pneu_mds_data,
alpha %>%
filter(infection == "uninfected") %>%
tibble::column_to_rownames('Row.names'),
by = "row.names") %>%
mutate(round = factor(round),
week = factor(week)) %>%
merge(., donor_info, by = "round")
ggplot(nhbe_pneu_graph_data, aes(x = MDS1, y = MDS2, color = donor)) +
geom_point(size = 3) +
scale_shape_manual(values = c(3, 19, 17, 15)) +
ggtitle("Preinfection phenotyping cell type diversity MDS-\nNHBE pneumacult, week 3 only") +
theme_bw() +
theme(panel.grid = element_blank()) +
labs(color = "Donor")
Shows UMAP of flow cytometry data of infected cells. Flow cytometry channel value data from round 7 Pneumacult treated cells was used to generate UMAPs displaying the manually gated populations and marker staining. The UMAP is a combination of uninfected cells (n=3 wells) and infected cells (n=3 wells). Figure 2A is generated using UMAP colored by HA or NP staining, but only the infected UMAP with staining is displayed.
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `Cal NP`, order = orderrank_np)) +
geom_point() +
#scale_color_gradientn(colors = c("white","#C6DBEF", "#4292C6", "#08306B")) +
scale_color_distiller(palette = "RdYlBu", direction = -1) +
facet_grid(.~condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by Cal NP staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
ggplot(data = cell.sub,
aes(x = UMAP_X, y = UMAP_Y, color = `Cal HA`, order = (orderrank_ha))) +
geom_point() +
scale_color_distiller(palette = "RdYlBu", direction = -1) +
facet_grid(.~condition) +
ggtitle("Round 7 Pneumacult UMAP\nFlow phenotyping by Cal HA staining") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
coord_fixed()
Shows alluvial plots of pre and post infection phenotyping data, as well as the overall percentage of alive cells that are IAV+.
This dataset is generated from the average cell type frequencies and coerced into alluvial format. The code below will generate an alluvial plot for each of the rounds/conditions that were infected with Cal/07/09. This includes:
Cells differentiated for 3 weeks: * Pneumacult (PC) treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + IL-13 low treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + IL-13 high treated NHBE cells, infected at MOI=0.5 (rounds 3, 4, 7, 9) * PC + DAPT treated NHBE cells, infected at MOI=0.5 (rounds 7 and 9) * Lonza treated NHBE cells, infected at MOI=0.5 (rounds 7 and 9) * PC treated NHNE cells, infected at MOI=0.5 (round 7 only) * PC treated NHBE cells, infected at MOI=1 (round 3 only)
Cells differentiated for 0 weeks: * PC treated NHBE cells, infected at MOI=0.5 (round 7 only)
alluvial_dat <- read_excel("../data/nhbe_alluvial_plot_data.xlsx", range = cell_cols("A:G"), na = "NA") %>%
mutate(cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
stratum = factor(stratum, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal", "negative", "positive")),
measurement = as.factor(measurement),
measurement = fct_recode(measurement, "Preinfection" = "preinfection_frequency", "Postinfection" = "postinfection_frequency"),
measurement = factor(measurement, levels = c("Preinfection", "Postinfection", "IAV+")),
experiment = as.factor(experiment))
#Function to make alluvial plots for each of the sets of data
make_alluvial_plot <- function(exp, MOI) {
print(ggplot(data = alluvial_dat %>%
filter(experiment==exp & moi==MOI),
aes(x = measurement,
y=freq,
stratum = stratum,
alluvium = alluvium,
label = stratum)) +
geom_alluvium(aes(fill = stratum),color = "gray", na.rm=TRUE) +
geom_stratum(aes(fill=stratum), na.rm=TRUE) +
scale_fill_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080", "lightgray", "darkgray")) +
ggtitle(paste0("nHTBE ",exp," alluvial\nMOI=",MOI)) +
theme_bw() +
theme(panel.grid = element_blank()))
}
combos <- alluvial_dat %>%
distinct(experiment, moi)
for (x in (1:nrow(combos))) {
make_alluvial_plot(combos$experiment[x], combos$moi[x])
}
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
Shows bar plot of cell types present within IAV+ cells, comparing MOI of 0.5 to 1 of Pneumacult treated cells.
The phenotyping data for MOI=0.5 is a combination of w wells (replicates), while the MOI=1 had a single well.
phenotyping_dat %>%
filter(round == "3" & infection == "infected" & condition == "pneumacult") %>%
mutate(moi = factor(moi, levels = c("0.5", "1"))) %>%
droplevels() %>%
ggplot(aes(x = moi, y = avg_freq_of_alive, fill = cell_type)) +
geom_bar(stat = "identity", color = "black") +
geom_errorbar(aes(ymin = SDpos-sd, ymax = SDpos+sd), width = 0.2, position = "identity") +
scale_fill_manual(values = colors) +
scale_y_continuous(limits = c(0,100), expand = c(0,0)) +
ggtitle("NHBE post-infection IAV+ cells\nRound 3") +
labs(x = "MOI", y = "% of IAV+ cells") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank())
Graph files available on request (generated in Prism). Code for running the ANOVA with Tukey HSD posthoc test is listed below using log gMFI values from a differentiation of NHBE cells (N=3 wells) and 2 wells of A549 cells.
sialic_dat <- read.csv("../data/sialic_acid_log_gmfi_values.csv", header = TRUE, stringsAsFactors = TRUE)
#ANOVA for alpha 2,3 sialic acid data
a2_3_anova <- aov(gMFI ~ cell_type, data = subset(sialic_dat, sialic_acid_type=="a2_3"))
summary(a2_3_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 9 1.0667 0.1185 168.4 3.46e-16 ***
## Residuals 19 0.0134 0.0007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
a2_3_tukey_result <- HSD.test(a2_3_anova, "cell_type", group = TRUE)
print(a2_3_tukey_result)
## $statistics
## MSerror Df Mean CV
## 0.0007039806 19 3.997466 0.6637363
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 10 5.037459 0.05
##
## $means
## gMFI std r Min Max Q25 Q50
## A549 4.383900 0.026939196 2 4.364851 4.402949 4.374375 4.383900
## basal 3.937709 0.027658509 3 3.918869 3.969463 3.921832 3.924796
## CD271+ ciliated 4.326459 0.015521521 3 4.309524 4.340008 4.319684 4.329845
## CD66c- other 3.793874 0.003596293 3 3.789722 3.796019 3.792801 3.795880
## CD66c+ basal 4.076163 0.013379922 3 4.061867 4.088384 4.070053 4.078239
## CD66c+ other 3.864606 0.017060550 3 3.845098 3.876737 3.858540 3.871981
## ciliated 4.142304 0.011764348 3 4.130302 4.153815 4.136549 4.142796
## goblet 3.879580 0.062932970 3 3.808211 3.927114 3.855813 3.903416
## secretory 3.839061 0.013709037 3 3.826334 3.853577 3.831803 3.837273
## triple negative 3.859816 0.023433533 3 3.837146 3.883945 3.847752 3.858357
## Q75
## A549 4.393424
## basal 3.947129
## CD271+ ciliated 4.334926
## CD66c- other 3.795949
## CD66c+ basal 4.083312
## CD66c+ other 3.874359
## ciliated 4.148305
## goblet 3.915265
## secretory 3.845425
## triple negative 3.871151
##
## $comparison
## NULL
##
## $groups
## gMFI groups
## A549 4.383900 a
## CD271+ ciliated 4.326459 a
## ciliated 4.142304 b
## CD66c+ basal 4.076163 b
## basal 3.937709 c
## goblet 3.879580 cd
## CD66c+ other 3.864606 cde
## triple negative 3.859816 de
## secretory 3.839061 de
## CD66c- other 3.793874 e
##
## attr(,"class")
## [1] "group"
#ANOVA for alpha 2,6 sialic acid data
a2_6_anova <- aov(gMFI ~ cell_type, data = subset(sialic_dat, sialic_acid_type=="a2_6"))
summary(a2_6_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 9 1.7412 0.19347 50.77 2.14e-11 ***
## Residuals 19 0.0724 0.00381
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey posthoc test
a2_6_tukey_result <- HSD.test(a2_6_anova, "cell_type", group = TRUE)
print(a2_6_tukey_result)
## $statistics
## MSerror Df Mean CV
## 0.003810421 19 4.572376 1.350033
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 10 5.037459 0.05
##
## $means
## gMFI std r Min Max Q25 Q50
## A549 4.425764 0.003306624 2 4.423426 4.428102 4.424595 4.425764
## basal 4.486685 0.058440877 3 4.428944 4.545802 4.457127 4.485310
## CD271+ ciliated 5.050441 0.038302416 3 5.006286 5.074721 5.038301 5.070315
## CD66c- other 4.305451 0.045401364 3 4.262261 4.352780 4.281787 4.301312
## CD66c+ basal 4.795549 0.063828195 3 4.728289 4.855277 4.765685 4.803081
## CD66c+ other 4.510580 0.059548293 3 4.449725 4.568730 4.481505 4.513284
## ciliated 4.792961 0.027776811 3 4.761048 4.811696 4.783594 4.806139
## goblet 4.691511 0.114444762 3 4.596960 4.818740 4.627896 4.658831
## secretory 4.344361 0.056128094 3 4.295611 4.405722 4.313681 4.331751
## triple negative 4.271588 0.067899070 3 4.221414 4.348850 4.232957 4.244500
## Q75
## A549 4.426933
## basal 4.515556
## CD271+ ciliated 5.072518
## CD66c- other 4.327046
## CD66c+ basal 4.829179
## CD66c+ other 4.541007
## ciliated 4.808917
## goblet 4.738786
## secretory 4.368737
## triple negative 4.296675
##
## $comparison
## NULL
##
## $groups
## gMFI groups
## CD271+ ciliated 5.050441 a
## CD66c+ basal 4.795549 b
## ciliated 4.792961 b
## goblet 4.691511 b
## CD66c+ other 4.510580 c
## basal 4.486685 c
## A549 4.425764 cd
## secretory 4.344361 cd
## CD66c- other 4.305451 d
## triple negative 4.271588 d
##
## attr(,"class")
## [1] "group"
Data and graph files available on request (generated in Prism).
Shows week 0 alluvial plot. A plot for this experiment is generated in the figure 2B code chunk, but is reproduced here as well.
ggplot(data = subset(alluvial_dat, experiment == "7_pneumacult_week0"),
aes(x = measurement,
y=freq,
stratum = stratum,
alluvium = alluvium,
label = stratum)) +
geom_alluvium(aes(fill = stratum),color = "gray", na.rm=TRUE) +
geom_stratum(aes(fill=stratum), na.rm=TRUE) +
scale_fill_manual(values = c("#1080FF", "#8000FF", "#FB02FF", "#FD8008", "#FECC66", "#FC6FCF", "#FFFF0A", "#FFFFFF", "#108080", "lightgray", "darkgray")) +
ggtitle("Pneumacult week 0 post-ALI alluvial plot") +
theme_bw() +
theme(panel.grid = element_blank())
## Warning in setup_data(...): Some differentiation aesthetics vary within alluvia, and will be diffused by their first value.
## Consider using `geom_flow()` instead.
Data and graph files available upon request (generated in Prism)
Data and graph files available upon request (generated in Prism)
Violin plots of HA and NP fluorescence intensity (FI). Channel values of fluorescence intensity values for HA and NP stains were exported from FlowJo for each gated population. The code below will produce violin plots in Figure 3C.
Each violin combines events from three wells.
meta_3c <- read.csv("../data/figure_3c_metadata.csv", header = TRUE)
#Get file info, we need to not read in empty files
list.of.files <- file.info(meta_3c$path)
# get the size for each file
sizes <- file.info(meta_3c$path)$size
#List non-empty files
list.of.non.empty.files <- rownames(list.of.files)[which(sizes > 6)]
fi_dat <- readr::read_csv(list.of.non.empty.files, id = "path", col_names = FALSE) %>%
rename(fi = X1)
## Rows: 49391 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): X1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fi_dat <- as.data.frame(fi_dat)
fi_dat <- left_join(fi_dat, meta_3c, by = "path") %>%
mutate(round = factor(round),
cell_origin = factor(cell_origin),
treatment = factor(treatment, levels = c("Pneumacult", "PC_IL13_low", "PC_IL13_high", "Lonza", "PC_DAPT")),
origin_treatment = paste0(cell_origin, "_", treatment),
origin_treatment = factor(origin_treatment, levels = c("nhbe_Pneumacult", "nhbe_PC_IL13_low", "nhbe_PC_IL13_high", "nhbe_PC_DAPT", "nhbe_Lonza", "nhne_Pneumacult")),
week == factor(week),
cell_type = factor(cell_type, levels = c("Ciliated", "Secretory", "Basal", "Triple Negative", "CD66c+ Other", "CD66c- Other", "Goblet", "CD271+ Ciliated", "CD66c+ Basal")),
HA_or_NP = factor(HA_or_NP),
logFI = log10(fi))
Graph in paper of round 7 data:
ggplot(fi_dat, aes(x = cell_type, y = logFI, color = cell_type)) +
geom_violin(scale = "count", draw_quantiles = c(0.25, 0.5, 0.75)) +
facet_grid(origin_treatment~HA_or_NP) +
scale_color_manual(values = colors) +
scale_fill_manual(values = colors) +
scale_y_continuous(limits = c(0,6)) +
ggtitle(paste0("Round 7 Week 3 log10 FI plot\nMOI 0.5")) +
labs(x = "Cell type",
y = "Log10 FI",
color = "Cell type") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
strip.text = element_text(size = 10))
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.
Mean FI values were compared within a treatment condition using an ANOVA and Tukey posthoc statistical test to see whether any cell types have higher virus burden within the infected cells. Differences in FI within a cell type between treatment conditions were not statistically compared.
#Function to perform anova. Requires FI dataframe, the round
run.anova <- function(df, round, week, origin_treatment, HA_or_NP) {
results <- aov(logFI ~ cell_type, data = subset(df, round == round & week == week & origin_treatment == origin_treatment & HA_or_NP == HA_or_NP))
print(paste0("ANOVA of round ", round,", week ", week, ", ", origin_treatment, " and ", HA_or_NP, " logFI between cell types"))
print(summary(results))
print(HSD.test(results, "cell_type", group = TRUE))
}
combos <- fi_dat %>%
distinct(round, week, HA_or_NP, origin_treatment)
for (x in (1:nrow(combos))) {
run.anova(fi_dat, combos$round[x], combos$week[x], combos$origin_treatment[x], combos$HA_or_NP[x])
}
## [1] "ANOVA of round 7, week 3, nhbe_Lonza and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_DAPT and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_low and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_high and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Lonza and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_low and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_IL13_high and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_PC_DAPT and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhne_Pneumacult and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
## [1] "ANOVA of round 7, week 3, nhne_Pneumacult and NP logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
run.anova(fi_dat, 7, 3, "nhbe_Pneumacult", "HA")
## [1] "ANOVA of round 7, week 3, nhbe_Pneumacult and HA logFI between cell types"
## Df Sum Sq Mean Sq F value Pr(>F)
## cell_type 8 631 78.89 366.5 <2e-16 ***
## Residuals 49382 10631 0.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## $statistics
## MSerror Df Mean CV
## 0.2152815 49382 3.651051 12.70824
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey cell_type 9 4.386509 0.05
##
## $means
## logFI std r Min Max Q25 Q50
## Basal 3.338597 0.5385615 499 2.551993 4.704409 2.877252 3.176311
## CD271+ Ciliated 3.832046 0.3838757 5555 2.552025 4.900796 3.616941 3.858975
## CD66c- Other 3.721810 0.5003153 420 2.593093 5.077571 3.382227 3.801166
## CD66c+ Basal 3.737218 0.5572598 432 2.591476 4.852589 3.324435 3.816204
## CD66c+ Other 3.612335 0.5623695 3233 2.445936 5.048178 3.116385 3.674303
## Ciliated 3.687258 0.4280634 25634 2.370524 5.031748 3.438439 3.734646
## Goblet 3.502913 0.4375353 236 2.563614 4.862660 3.208079 3.576817
## Secretory 3.563014 0.5351201 11766 2.420612 5.009366 3.108390 3.582073
## Triple Negative 3.249685 0.4283957 1616 2.478223 4.744064 2.928697 3.220904
## Q75
## Basal 3.823677
## CD271+ Ciliated 4.096585
## CD66c- Other 4.096538
## CD66c+ Basal 4.174894
## CD66c+ Other 4.064042
## Ciliated 3.978415
## Goblet 3.752905
## Secretory 3.989544
## Triple Negative 3.522732
##
## $comparison
## NULL
##
## $groups
## logFI groups
## CD271+ Ciliated 3.832046 a
## CD66c+ Basal 3.737218 b
## CD66c- Other 3.721810 b
## Ciliated 3.687258 b
## CD66c+ Other 3.612335 c
## Secretory 3.563014 d
## Goblet 3.502913 d
## Basal 3.338597 e
## Triple Negative 3.249685 f
##
## attr(,"class")
## [1] "group"
Shows replication dynamics of Cal/07/09 in NHBE cells. NHBE cells were differentiated for 3 weeks in Pneumacult media. This came from the round 3 differentiation.
The sequencing files were analyzed with the InVERT pipeline which produces csv files of mRNA, cRNA and vRNA counts for each influenza segment. Comparisons of m/c/vRNA ratios between ciliated and secretory cells, as well as total influenza TPM per cell type were graphed for the paper.
#Specify "not in" function
`%ni%` <- Negate(`%in%`)
meta_3d <- utils::read.csv("../data/figure_3d_metadata.csv", header = TRUE) %>%
mutate(moi = as.factor(moi)) %>%
mutate(replicate = as.factor(replicate)) %>%
mutate(timepoint = as.factor(timepoint))
files_3d <- list.files(path = "../data/invert_data", pattern = ".csv")
files_3d <- paste0("../data/invert_data/", files_3d)
figure_3_dat <- readr::read_csv(files_3d, id = "path", na = "")
## New names:
## Rows: 288 Columns: 10
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (8): FPKM_positive_sense, FPKM_negative_sense,
## mrna:(mrna,crna)_ratio, T...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(figure_3_dat)[2] <- "gene"
names(figure_3_dat)[5] <- "mrna_to_total_pos_rna"
#Change the NA's that are calculated for positive or negative TPM values (when there are no FPKM values) to zero, except for the spliced genes
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$TPM_positive_sense), "TPM_positive_sense"] <- 0
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$TPM_negative_sense), "TPM_negative_sense"] <- 0
#Change NAs that are entered for cRNAs and vRNAs to zero, except for the spliced genes
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$crna_TPM), "crna_TPM"] <- 0
figure_3_dat[figure_3_dat$gene %ni% c("NS1", "NEP", "M1", "M2") & is.na(figure_3_dat$vrna_TPM), "vrna_TPM"] <- 0
#Change NAs that are entered for mRNAs to zero, except for M and NS which don't exist as mRNAs
figure_3_dat[figure_3_dat$gene %ni% c("M", "NS") & is.na(figure_3_dat$mrna_TPM), "mrna_TPM"] <- 0
figure_3_dat <- merge(meta_3d, figure_3_dat, by = "path") %>%
dplyr::select(.,-c(path,FPKM_positive_sense, FPKM_negative_sense))
figure_3_dat_long <- figure_3_dat %>%
tidyr::gather(key = "RNA_type", value = "TPM", -c(Sample, id, condition, moi, timepoint, cell_type, subset, replicate, gene, mrna_to_total_pos_rna)) %>%
mutate(RNA_type = factor(RNA_type, levels = c("TPM_positive_sense", "TPM_negative_sense", "crna_TPM", "mrna_TPM", "vrna_TPM"))) %>%
mutate(gene = factor(gene, levels = c("PB2", "PB1", "PA", "HA", "NP", "NA", "M", "M1", "M2", "NS", "NS1", "NEP"))) %>%
group_by(cell_type,subset,gene,moi,timepoint,RNA_type) %>%
dplyr::summarise(averageTPM = mean(TPM),
num = n())
## `summarise()` has grouped output by 'cell_type', 'subset', 'gene', 'moi',
## 'timepoint'. You can override using the `.groups` argument.
#Make dataset which plots secretory and ciliated m/c/vRNA levels on the x and y axis
proportions.dat <- figure_3_dat_long %>%
filter(RNA_type != "TPM_positive_sense", RNA_type != "TPM_negative_sense") %>%
mutate(subset = factor(subset)) %>%
mutate(cell_type = factor(cell_type)) %>%
mutate(timepoint = as.numeric(as.character(timepoint))) %>%
filter(!is.na(averageTPM)) %>%
droplevels() %>%
group_by(gene, subset, timepoint) %>%
mutate(total_TPM = sum(averageTPM),
proportion = averageTPM/total_TPM*100)
#Make new dataframe which will compare the ciliated values and secretory values on the x and y axis, respectively
proportion_regression_dat <- proportions.dat %>%
filter(subset == "ciliated") %>%
left_join(proportions.dat %>%
filter(subset == "secretory"),
by = c("gene", "moi", "RNA_type", "timepoint"),
suffix = c(".ciliated", ".secretory")) %>%
mutate(timepoint = factor(timepoint))
Calculating correlation coefficient for combined dataset
cor.test(x = proportion_regression_dat$proportion.ciliated,
y = proportion_regression_dat$proportion.secretory, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: proportion_regression_dat$proportion.ciliated and proportion_regression_dat$proportion.secretory
## t = 68.107, df = 72, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9878114 0.9951752
## sample estimates:
## cor
## 0.9923281
Actual p value from this test:
cor.test(x = proportion_regression_dat$proportion.ciliated,
y = proportion_regression_dat$proportion.secretory, method = "pearson")$p.value
## [1] 4.059059e-67
mcv_corr_plot <- ggplot(subset(proportion_regression_dat,timepoint != '0'),
aes(x = proportion.ciliated,
y = proportion.secretory)) +
geom_point(aes(shape = RNA_type,
color = gene),
#alpha = timepoint),
size = 3) +
geom_smooth(method = "lm",
color = "black",
linewidth = 0.5,
formula = y ~ x)+
stat_cor(aes(label = ..rr.label..)) +
ggtitle("Correlation of m/c/vRNA proportions in ciliated and secretory cells\n0.5 MOI, JF291/FN02") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major = element_blank())
mcv_corr_plot
## Warning: The dot-dot notation (`..rr.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(rr.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Comparing total IAV TPM in secretory vs ciliated cells with a T test at both 6 and 12 hours post infection
#Summarize the total IAV TPM
tpm_dat <- figure_3_dat %>%
filter(moi == 0.5) %>%
group_by(subset, replicate, moi, timepoint) %>%
dplyr::summarise(total_pos_TPM = sum(TPM_positive_sense, na.rm = TRUE),
total_neg_TPM = sum(TPM_negative_sense, na.rm = TRUE)) %>%
dplyr::mutate(total_TPM = total_pos_TPM+total_neg_TPM,
subset = factor(subset, levels = c("ciliated", "secretory")))
## `summarise()` has grouped output by 'subset', 'replicate', 'moi'. You can
## override using the `.groups` argument.
#Calculate T tests for each group
ttest.results <- tpm_dat %>%
group_by(timepoint) %>%
do( tidy(t.test(data=., total_TPM~subset)))
ttest.results
## # A tibble: 2 × 11
## # Groups: timepoint [2]
## timepoint estimate estimate1 estimate2 statistic p.value parameter conf.low
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6 -4349. 5409. 9759. -6.89 0.000558 5.74 -5911.
## 2 12 13029. 52425. 39395. 2.04 0.130 3.12 -6847.
## # ℹ 3 more variables: conf.high <dbl>, method <chr>, alternative <chr>
Only differences at 6 hours are statistically significant. Make a bar plot of this data:
#Summarize mean and standard deviation
tpm_graph_dat <- summarySE(data=tpm_dat, measurevar = "total_TPM", groupvars = c("subset", "timepoint", "moi"))
annotation_df <- data.frame(
timepoint = c(6, 12),
start = c("ciliated", "ciliated"),
end = c("secretory", "secretory"),
y = c(60000, 60000),
label = c("p=0.0006", "ns")
)
#Create bar plot which shows mean +/- standard deviation
ggplot(tpm_graph_dat, aes(x = subset, y = total_TPM)) +
geom_bar(aes(fill = subset), stat = "identity") +
geom_errorbar(aes(ymin=total_TPM-sd, ymax=total_TPM+sd),
size=.3, # Thinner lines
width=.2,
position=position_dodge(.9)) +
geom_signif(data = annotation_df,
aes(xmin = start, xmax = end, annotations = label, y_position = y),
manual = TRUE) +
facet_grid(moi~timepoint) +
scale_fill_manual(values = c("#1080FF", "#8000FF")) +
ggtitle("Total IAV transcript counts (TPM)\nciliated vs secretory cells\n0.5 MOI, JF291/FN02") +
labs(x = "Cell type",
y = "Average TPM",
caption = "Error bars show +/- standard deviation\nSignificance calculated with Welch's two sample t-test") +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_signif(data = annotation_df, aes(xmin = start, xmax = end, :
## Ignoring unknown aesthetics: xmin, xmax, annotations, and y_position
## Warning: Combining variables of class <factor> and <numeric> was deprecated in ggplot2
## 3.4.0.
## ℹ Please ensure your variables are compatible before plotting (location:
## `combine_vars()`)
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Combining variables of class <numeric> and <factor> was deprecated in ggplot2
## 3.4.0.
## ℹ Please ensure your variables are compatible before plotting (location:
## `combine_vars()`)
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Data and graph files available upon request (generated in Prism).
Shows PCA generated on infected phenotyping data generated via flow cytometry data, i.e. proportions of infected cell types present in the culture. Proportions were calculated by identifying cell type frequencies within the IAV+ cells in the culture, and scaling each proportion to the total %IAV+ cells (in other words, the proportions of each cell type will add up to the % infection in the culture.)
Points on the PCA are colored according to a supplementary variable of viral burst size, measured via area under the curve of a 36h multicycle growth curve (MCGC). Cultures differentiated for 1 or 3 weeks were infected with Cal/07/09 (MOI=0.1). Virus was harvested and plaqued on MDCK cells at 0, 12, 24, and 36 hours post infection. The PFU/total cell was calculated from each timepoint, and area under the curve was calculated as an average of n=5 technical replicates. The following culture/treatments are shown in the figure 4 data:
Cells differentiated for 3 weeks: * Pneumacult (PC) treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + IL-13 low treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + IL-13 high treated NHBE cells, infected at MOI=0.1 (rounds 3, 4, 7, 9) * PC + DAPT treated NHBE cells, infected at MOI=0.1 (rounds 7 and 9) * Lonza treated NHBE cells, infected at MOI=0.1 (rounds 7 and 9) * PC treated NHNE cells, infected at MOI=0.1 (round 7 only)
Cells differentiated for 1 week: * PC treated NHBE cells, infected at MOI=0.1 (round 7 only) * Lonza treated NHBE cells, infected at MOI=0.1 (round 7 only)
figure_4_dat <- read.csv("../data/nhbe_burst_size_data.csv", header = TRUE, row.names = 1)
scaled_inf_pca <- prcomp(figure_4_dat[,c(14:22)],
scale = TRUE)
scaled_inf_pca_plot <- fviz_pca_biplot(scaled_inf_pca, repel = TRUE,
col.ind = figure_4_dat$pfu_per_cell_36h_auc,
gradient.cols = c("blue", "red"), # Variables color
title = "PCA biplot of *scaled* IAV+ phenotyping\nNo round 5 or week 0\n36 hr AUC",
legend.title = list(color = "AUC 36h"))
scaled_inf_pca_plot
Shows similar PCA to 4B, but PCA is performed on the uninfected phenotyping data. This phenotyping data largely overlaps with the data described in figure 2B, with the addition of a 1 week differentiated culture that was grown in Lonza media.
uninfected_pca <- prcomp(figure_4_dat[,c(5:13)],
scale = TRUE)
uninfected_pca_plot <- fviz_pca_biplot(uninfected_pca, repel = TRUE,
col.ind = figure_4_dat$pfu_per_cell_36h_auc,
gradient.cols = c("blue", "red"),
title = "PCA biplot of pre-infection phenotyping\n36 hr AUC",
legend.title = list(color = "AUC 36h"))
uninfected_pca_plot